library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)

POP=""
TD=data.frame()
s=data.frame()
make_graph=function(TD,s){
  ggplot(data = TD, aes(x=BIN_START, y=TajimaD), ylim=c(min(TD$TajimaD -0.5), max(TD$TajimaD + 0.5)))  + geom_point(shape = 16, colour= alpha("black", 1/5)) + facet_wrap(~CHROM, scales = "free_x")  + geom_hline(aes(yintercept= q1, colour ="quantile"), data=s,)  + geom_hline(aes(yintercept= q2, colour ="quantile"), data=s)  +geom_hline(aes(yintercept= m, colour="mean"), data=s) + scale_colour_manual("",breaks=c("mean","quantile"),values=c("blue","red")) + scale_x_continuous( xlab("Chromosome Position (Mbp)")) + ylab("Tajima's D") + ggtitle(paste0(POP," Tajima's D by Chromosome")) + theme( plot.background= element_rect(colour="black",fill=NA), legend.position= c(0.75, 0.12))
}
setwd("~/MurrayXsan/Bioinformatics/working_dir/extract/TajD/")


for( POP in c("AXIOM","OMNI","CEU","CHB","CHS","GBR","YRI")){
  print(POP)
  
  TD=data.frame()
  for( i in 1:22){
    TD=rbind(TD,read.table(file = paste0(POP,i,".taj_d"), header=TRUE))
  }
  s = TD %>% group_by(CHROM) %>% summarise(m=mean(TajimaD), sd(TajimaD), min(TajimaD), max(TajimaD), q1 = quantile(TajimaD, 0.01), q2 = quantile(TajimaD, 0.99))
  print(s)
  plot(make_graph(TD,s))
}
## [1] "AXIOM"
## Source: local data frame [22 x 7]
## 
##    CHROM        m sd(TajimaD) min(TajimaD) max(TajimaD)        q1       q2
## 1      1 1.058279    1.221099     -2.41387      5.25771 -1.731181 3.857003
## 2      2 1.137210    1.296195     -2.57669      5.17214 -1.841302 3.917500
## 3      3 1.049447    1.249424     -2.43823      5.00940 -1.799287 3.809078
## 4      4 1.195485    1.277510     -2.40939      5.12566 -1.808687 4.032744
## 5      5 1.218779    1.229530     -2.52491      4.90840 -1.641137 3.899892
## 6      6 1.122992    1.246940     -2.34988      5.00515 -1.728202 3.885709
## 7      7 1.154358    1.288883     -2.37870      5.09934 -1.853535 3.872779
## 8      8 1.241303    1.293007     -2.47176      4.96928 -1.754783 4.079039
## 9      9 1.017197    1.222401     -2.38939      5.04482 -1.655562 3.932412
## 10    10 1.130169    1.244277     -2.50805      4.80763 -1.757709 3.882264
## ..   ...      ...         ...          ...          ...       ...      ...

## [1] "OMNI"
## Source: local data frame [22 x 7]
## 
##    CHROM        m sd(TajimaD) min(TajimaD) max(TajimaD)        q1       q2
## 1      1 1.179641    1.313091     -2.57726      5.38479 -1.794255 4.150716
## 2      2 1.279367    1.396452     -2.57055      5.05386 -1.971803 4.139439
## 3      3 1.155094    1.358149     -2.48649      5.43189 -1.946865 4.015615
## 4      4 1.310548    1.366403     -2.47182      5.13952 -2.022585 4.195300
## 5      5 1.377433    1.325676     -2.46235      5.77829 -1.816413 4.138347
## 6      6 1.367835    1.337405     -2.55347      5.52324 -1.915355 4.122679
## 7      7 1.378357    1.354550     -2.65073      5.36996 -1.938387 4.147289
## 8      8 1.405680    1.348083     -2.52308      5.13749 -1.906609 4.121874
## 9      9 1.151282    1.296751     -2.30045      5.33263 -1.690985 4.218668
## 10    10 1.294337    1.341359     -2.44675      5.09618 -1.804936 4.176304
## ..   ...      ...         ...          ...          ...       ...      ...

## [1] "CEU"
## Source: local data frame [22 x 7]
## 
##    CHROM         m sd(TajimaD) min(TajimaD) max(TajimaD)        q1
## 1      1 0.8565231    1.100705     -2.64798      5.06537 -1.743686
## 2      2 0.9277225    1.096790     -2.58216      4.60973 -1.656872
## 3      3 0.9974389    1.105686     -2.47956      5.41122 -1.689959
## 4      4 1.0271121    1.142481     -2.45799      4.96464 -1.698714
## 5      5 1.0042110    1.081483     -2.26605      4.34600 -1.528696
## 6      6 1.0331145    1.080514     -2.49485      4.49871 -1.609358
## 7      7 1.0103342    1.099849     -2.34492      4.67497 -1.700800
## 8      8 0.9288049    1.135500     -2.46394      5.18678 -1.824635
## 9      9 0.7950899    1.037591     -2.37685      4.92409 -1.485879
## 10    10 0.9824180    1.096389     -2.24969      5.07686 -1.644561
## ..   ...       ...         ...          ...          ...       ...
## Variables not shown: q2 (dbl)

## [1] "CHB"
## Source: local data frame [22 x 7]
## 
##    CHROM        m sd(TajimaD) min(TajimaD) max(TajimaD)        q1       q2
## 1      1 1.410159    1.304827     -2.24732      5.40691 -1.463001 4.190590
## 2      2 1.513834    1.330909     -2.44921      5.28161 -1.683473 4.240492
## 3      3 1.491101    1.295906     -2.30389      5.62477 -1.584205 4.156571
## 4      4 1.669091    1.293744     -2.47443      5.49978 -1.538758 4.398708
## 5      5 1.613264    1.275396     -2.35769      5.43509 -1.453832 4.295589
## 6      6 1.609703    1.222567     -2.35785      5.29033 -1.336052 4.290732
## 7      7 1.627516    1.273563     -2.30162      5.12753 -1.502277 4.215970
## 8      8 1.683986    1.279263     -2.39828      5.13955 -1.568622 4.294316
## 9      9 1.314781    1.246523     -2.23407      5.01544 -1.321288 3.963602
## 10    10 1.569031    1.247372     -2.34669      5.65041 -1.503554 4.199709
## ..   ...      ...         ...          ...          ...       ...      ...

## [1] "CHS"
## Source: local data frame [22 x 7]
## 
##    CHROM        m sd(TajimaD) min(TajimaD) max(TajimaD)        q1       q2
## 1      1 1.411644    1.301160     -2.25120      5.43979 -1.461741 4.217358
## 2      2 1.517107    1.334690     -2.41266      5.17482 -1.623727 4.268971
## 3      3 1.488766    1.300325     -2.41213      5.95105 -1.623663 4.217388
## 4      4 1.686436    1.305082     -2.14187      5.67374 -1.546137 4.457816
## 5      5 1.587065    1.296075     -2.30040      5.38741 -1.449092 4.351365
## 6      6 1.634972    1.255553     -2.32052      5.48588 -1.370659 4.309805
## 7      7 1.642527    1.270168     -2.18890      5.37590 -1.515004 4.257370
## 8      8 1.675892    1.286911     -2.29523      5.41696 -1.535959 4.324278
## 9      9 1.333981    1.259306     -2.43656      4.72201 -1.237503 4.021613
## 10    10 1.607835    1.247713     -1.99432      5.67257 -1.457846 4.243254
## ..   ...      ...         ...          ...          ...       ...      ...

## [1] "GBR"
## Source: local data frame [22 x 7]
## 
##    CHROM         m sd(TajimaD) min(TajimaD) max(TajimaD)        q1
## 1      1 0.8899343    1.099841     -2.49439      4.86906 -1.704978
## 2      2 0.9469137    1.108518     -2.56737      4.48601 -1.660077
## 3      3 1.0240108    1.111766     -2.56121      5.25840 -1.682188
## 4      4 1.0734862    1.160448     -2.45368      4.90476 -1.648888
## 5      5 1.0330567    1.065542     -2.43175      4.91534 -1.472810
## 6      6 1.0672095    1.086707     -2.42006      4.85198 -1.610954
## 7      7 1.0505273    1.105998     -2.54973      4.81216 -1.737564
## 8      8 0.9614845    1.136591     -2.45889      5.00832 -1.867437
## 9      9 0.8202681    1.043071     -2.37884      4.53003 -1.508160
## 10    10 1.0098594    1.065343     -2.36969      4.90353 -1.454549
## ..   ...       ...         ...          ...          ...       ...
## Variables not shown: q2 (dbl)

## [1] "YRI"
## Source: local data frame [22 x 7]
## 
##    CHROM         m sd(TajimaD) min(TajimaD) max(TajimaD)        q1
## 1      1 0.4247375   0.7073689     -1.79954      4.06479 -1.116038
## 2      2 0.4552821   0.6976486     -1.90543      3.91914 -1.067957
## 3      3 0.4951910   0.7040228     -1.74635      4.32211 -1.019574
## 4      4 0.5211350   0.7533990     -1.90023      3.87774 -1.104760
## 5      5 0.4479188   0.7088040     -1.89752      4.03198 -1.078202
## 6      6 0.5091487   0.7569150     -1.70024      4.11953 -1.094817
## 7      7 0.4804271   0.7383283     -1.62320      4.33519 -1.120619
## 8      8 0.4628361   0.7014090     -1.80970      3.41125 -1.011462
## 9      9 0.4123545   0.6821319     -1.77277      3.42126 -1.051369
## 10    10 0.4961932   0.7249085     -1.87367      3.64488 -1.044511
## ..   ...       ...         ...          ...          ...       ...
## Variables not shown: q2 (dbl)